from narrativeNLP.wrappers import get_narratives

import pandas as pd
import pickle as pk
from tqdm import tqdm
import json
import glob

import sys
args = sys.argv
batch_id = int(args[1])
c = int(args[2])

def read_json_file(path):
    with open(path, 'r') as f:
        srl_res = json.load(f)
    return srl_res

# Load batches
batches = pd.read_pickle('../data/batches.pk')

# Load sentences of the corresponding batch
filenames = batches[batch_id]
list_of_dataframes = []

for filename in tqdm(filenames):
    if filename == '../data/gpo_sentences/1994-02-01_528.csv':
        print('Empty file.')
    elif filename == '../data/gpo_sentences/1994-01-31_507.csv':
        print('Empty file.')
    elif filename == '../data/gpo_sentences/1994-02-01_549.csv':
        print('Empty file.')
    elif filename == '../data/gpo_sentences/1994-01-31_430.csv':
        print('Empty file.')
    else:
        temp = pd.read_csv(filename)
        temp['doc'] = filename
        list_of_dataframes.append(temp)

split_sentences = pd.concat(list_of_dataframes, ignore_index=True)
len_batch = len(split_sentences)
split_sentences = (list(split_sentences['doc']), list(split_sentences['sentence']))

# Load SRL
size = 10000
srl_files = ['../data/gpo_srl_annotations/srl_res_small_{0}_{1}.json'.format(batch_id, i) for i in range(0,len_batch,size)]
srl_res = []
for f in srl_files:
    srl_res_batch = read_json_file(f)
    srl_res.extend(srl_res_batch)

for i,srl in enumerate(srl_res):
    if srl is None:
        srl_res[i] = {'words': [], 'verbs': []}

# Check if sentences and SRL results are of same length
if not len(srl_res) == len(split_sentences[1]):
    raise Exception('Sentences and SRL output not of same length!')

narrative_model = pd.read_pickle('../models/narrative_model_{0}_clusters.pk'.format(c))

final_statements = get_narratives(
    srl_res=srl_res,
    doc_index=split_sentences[0],
    narrative_model=narrative_model,
    save_to_disk='../data/gpo_narratives/narratives_{0}_{1}.csv'.format(batch_id, c),
    save_postproc_roles='../data/gpo_processed_roles/postproc_roles_{0}_{1}.csv'.format(batch_id, c),
    save_raw_roles='../data/gpo_raw_roles/raw_roles_{0}_{1}.csv'.format(batch_id, c),
    n_clusters=[0],
    cluster_labeling='most_frequent',
    progress_bar=True,
    )
